*===================================================
* TABLE 1, COLS 2-4
*===================================================

local policy_date=14943							//policy change date in days 
local citation_window=3650						//max priority-priority lag
local window_size=7								//size of time window for repeated cross-sectional analysis
local window_num=26								//number of times windows either side of policy change
local window=`window_num'*`window_size'			//resulting cohort window size						


global DATA  "path/to/input/data"   
global OUT "path/to/output/folder"
global INT "path/to/intermediate/files"

// trend removal


forvalues seed=1/1000 {
	use "$DATA\twincites_applicant_did_prep.dta", clear
	gen twin=1
	append using "$DATA\USEPcites_applicant_did_prep.dta"
	replace twin=0 if twin==.
	drop if backcite_count_citing>100
	sort twin family_cited priority_citing_d
	duplicates drop twin family_cited nonJP, force
	duplicates tag family_cited, gen(tag)
	drop if tag==0
	drop tag
	
	set seed `seed'
	gen rand=runiform()
	gen rand1=runiform()
	sort rand1
	sort family_cited
	by family_cited: replace rand=rand[1]
	drop rand1
	
	split wipo_field_broad_orig_cited, p(;)
	levelsof wipo_field_broad_orig_cited1, local(wipo)
	foreach cat in `wipo' {
		sum family_cited if wipo_field_broad_orig_cited1=="`cat'" & twin==0
		local count_`cat'_0=r(N)
		sum family_cited if wipo_field_broad_orig_cited1=="`cat'" & twin==1
		local count_`cat'_1=r(N)
		if `count_`cat'_0'>`count_`cat'_1' {
			local ratio=`count_`cat'_1'/`count_`cat'_0'
			drop if rand>`ratio' & wipo_field_broad_orig_cited1=="`cat'" & twin==0
		}
		if `count_`cat'_1'>`count_`cat'_0' {
			local ratio=`count_`cat'_0'/`count_`cat'_1'
			drop if rand>`ratio' & wipo_field_broad_orig_cited1=="`cat'" & twin==1
		}
	}
	drop wipo_field_broad_orig_cited? rand

	save "$INT\twin_USEP_appended_applicant_did_prep_`seed'.dta", replace
}

//run triple diff

local policy_date=14943							//policy change date in days 
local citation_window=3650						//max priority-priority lag
local window_size=7								//size of time window for repeated cross-sectional analysis
local window_num=26							//number of times windows either side of policy change
local window=`window_num'*`window_size'			//resulting cohort window size						

local controls_fes="science_cites originality inventors_citing i.wipo_citing_broad i.wipo_cited_broad "

matrix C=[.,.,.,.,.,.,.,.,.,.,.,.,.,.]

est clear 
forvalues seed=1/1000 {
	*take base set
	use "$INT\twin_USEP_appended_applicant_did_prep_`seed'.dta", clear

	*set up time parameter
	gen temp=floor((US_filing_date_d_cited-`policy_date')/`window_size')
	quietly: sum temp
	gen time=temp-r(min)
	
	drop treated
	gen treated=after_change_cited*nonJP*twin
	replace treated=0 if treated==.
	gen twinXtime=twin*after_change_cited
	gen twinXUS=twin*nonJP
	gen USXtime=nonJP*after_change_cited
	
	display "twin: triple diff"
	reg lag treated twinXtime twinXUS USXtime nonJP twin `controls_fes' i.time, vce(cluster inventor_country_citing_e)
	
	
	local N=e(N)
	local R2=e(r2)
	return li 
	matrix tab=r(table)	
	local treated_coef=tab[1,1]
	local twinXtime=tab[1,2]
	local twinXUS=tab[1,3]
	local us_time=tab[1,4]
	local us=tab[1,5]
	local twin=tab[1,6]
	local sci=tab[1,7]
	local orig=tab[1,8]
	local inv=tab[1,9]
	local cols=colsof(tab)
	lincom treated+twinXtime
	local effect=r(estimate)
	local cons=tab[1,`cols']
	matrix C=C\[`seed',`treated_coef',`twinXtime',`twinXUS',`us_time', `us',`twin', `sci', `orig',`inv', `cons', `N', `effect',`R2']
	
}


// run standard reg with USEP twins


local cite_set="applicant"				//choose which set of citations to examine
local policy_date=14943							//policy change date in days 
local citation_window=3650						//max priority-priority lag
local window_size=7								//size of time window for repeated cross-sectional analysis
local window_num=26							//number of times windows either side of policy change
local window=`window_num'*`window_size'			//resulting cohort window size						
local country="US"								//select non-JP citing country

local controls_fes="science_cites originality inventors_citing i.wipo_citing_broad i.wipo_cited_broad "

matrix D=[.,.,.,.,.,.,.,.,.]

est clear 
forvalues seed=1/1000 {
	*take base set
	use "$INT\twin_USEP_appended_applicant_did_prep_`seed'.dta", clear
	drop if twin==1
	drop twin

	*apply special restrictions for this analysis (as baseline, check if control variables are nonmissing)
	
	replace backcite_count_citing=ln(backcite_count_citing+1)
	quietly: sum backcite_count_citing
	replace backcite_count_citing=(backcite_count_citing)/r(sd)

	*set up time parameter
	gen temp=floor((US_filing_date_d_cited-`policy_date')/`window_size')
	quietly: sum temp
	gen time=temp-r(min)
	
	drop treated
	gen treated=after_change_cited*nonJP
	replace treated=0 if treated==.
	
	display "twin: triple diff"
	reg lag treated nonJP `controls_fes' i.time , vce(cluster inventor_country_citing_e)
	
	local N=e(N)
	local R2=e(r2)
	return li 
	matrix tab=r(table)	
	local treated_coef=tab[1,1]
	local us=tab[1,2]
	local sci=tab[1,3]
	local orig=tab[1,4]
	local inv=tab[1,5]
	local cols=colsof(tab)
	local cons=tab[1,`cols']
	matrix D=D\[`seed',`treated_coef', `us', `sci', `orig',`inv', `cons', `N', `R2']
	
}

// Run standard DID on reduced base set


local cite_set="applicant"				//choose which set of citations to examine
local policy_date=14943							//policy change date in days 
local citation_window=3650						//max priority-priority lag
local window_size=7								//size of time window for repeated cross-sectional analysis
local window_num=26							//number of times windows either side of policy change
local window=`window_num'*`window_size'			//resulting cohort window size						
local country="US"								//select non-JP citing country

local controls_fes="science_cites originality inventors_citing i.wipo_citing_broad i.wipo_cited_broad "

matrix E=[.,.,.,.,.,.,.,.,.]

est clear 
forvalues seed=1/1000 {
	*take base set
	use "$INT\twin_USEP_appended_applicant_did_prep_`seed'.dta", clear
	drop if twin==0
	drop twin

	*apply special restrictions for this analysis (as baseline, check if control variables are nonmissing)
	
	replace backcite_count_citing=ln(backcite_count_citing+1)
	quietly: sum backcite_count_citing
	replace backcite_count_citing=(backcite_count_citing)/r(sd)

	*set up time parameter
	gen temp=floor((US_filing_date_d_cited-`policy_date')/`window_size')
	quietly: sum temp
	gen time=temp-r(min)
	
	drop treated
	gen treated=after_change_cited*nonJP
	replace treated=0 if treated==.
	gen USXtime=nonJP*after_change_cited
	
	display "twin: triple diff"
	reg lag treated nonJP `controls_fes' i.time `controls_fes', vce(cluster inventor_country_citing_e)
	
	local N=e(N)
	local R2=e(r2)
	return li 
	matrix tab=r(table)	
	local treated_coef=tab[1,1]
	local us=tab[1,2]
	local sci=tab[1,3]
	local orig=tab[1,4]
	local inv=tab[1,5]
	local cols=colsof(tab)
	local cons=tab[1,`cols']
	matrix E=E\[`seed',`treated_coef', `us', `sci', `orig',`inv', `cons', `N',`R2']
	
}


clear
svmat2 D
drop if D1==.

foreach v of varlist D2 D3 D4 D5 D6 D7 {
	gen sq_diff_`v'=.
	quietly: sum `v'
	replace sq_diff_`v'=(`v'-r(mean))^2
}

gen count=1
foreach v of varlist D2 D3 D4 D5 D6 D7 {
	quietly: sum sq_diff_`v'
	local sqdiffsum_`v'=r(sum)
	quietly: sum `v'
	local mean_`v'=r(mean)
	local se_`v'=sqrt((1/(1000-1))*`sqdiffsum_`v'')
	local z_`v'=abs((`mean_`v''/`se_`v''))
	local p_`v'=exp(-0.717*`z_`v''-0.416*(`z_`v'')^2)
	local l95_`v'=`mean_`v''-1.96*`se_`v''
	local h95_`v'=`mean_`v''+1.96*`se_`v''
}

quietly: sum D8
local N_D8=r(mean)

quietly: sum D9
local R_D9=r(mean)

clear
svmat2 E
drop if E1==.

foreach v of varlist E2 E3 E4 E5 E6 E7  {
	gen sq_diff_`v'=.
	quietly: sum `v'
	replace sq_diff_`v'=(`v'-r(mean))^2
}

gen count=1
foreach v of varlist E2 E3 E4 E5 E6 E7 {
	quietly: sum sq_diff_`v'
	local sqdiffsum_`v'=r(sum)
	quietly: sum `v'
	local mean_`v'=r(mean)
	local se_`v'=sqrt((1/(1000-1))*`sqdiffsum_`v'')
	local z_`v'=abs((`mean_`v''/`se_`v''))
	local p_`v'=exp(-0.717*`z_`v''-0.416*(`z_`v'')^2)
	local l95_`v'=`mean_`v''-1.96*`se_`v''
	local h95_`v'=`mean_`v''+1.96*`se_`v''
}

quietly: sum E8
local N_E8=r(mean)
quietly: sum E9
local R_E9=r(mean)

clear
svmat2 C
drop if C1==.

foreach v of varlist C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13{
	gen sq_diff_`v'=.
	quietly: sum `v'
	replace sq_diff_`v'=(`v'-r(mean))^2
}

gen count=1
foreach v of varlist C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13{
	quietly: sum sq_diff_`v'
	local sqdiffsum_`v'=r(sum)
	quietly: sum `v'
	local mean_`v'=r(mean)
	local se_`v'=sqrt((1/(1000-1))*`sqdiffsum_`v'')
	local var_`v'=(`se_`v'')^2
	local z_`v'=abs((`mean_`v''/`se_`v''))
	local p_`v'=exp(-0.717*`z_`v''-0.416*(`z_`v'')^2)
	local l95_`v'=`mean_`v''-1.96*`se_`v''
	local h95_`v'=`mean_`v''+1.96*`se_`v''
}

quietly: sum C12
local N_C12=r(mean)
quietly: sum C14
local R_C14=r(mean)

file open usep using "$OUT\usep_regs.txt", write replace

file write usep "US-EP DID" _n
file write usep "Order: us_time, us, sci, orig, inv, cons, N, R2" _n
file write usep  "=============" _n
foreach v in D2 D3 D4 D5 D6 D7 {
	file write usep  "`v': " _n
	file write usep  "Mean: " (round(`mean_`v'', 0.0001)) _n
	file write usep  "p: " (round(`p_`v'', 0.0001)) _n
	file write usep  "Conf 95: ["     
	file write usep (round(`l95_`v'', 0.001))
	file write usep ", "
	file write usep (round(`h95_`v'', 0.001))
	file write usep "]" _n
	file write usep  "=============" _n
}
file write usep  "N: " (round(`N_D8', 0.1)) _n
file write usep  "R2: " (round(`R_D9', 0.001)) _n _n

file write usep "Reduced base set DID" _n
file write usep "Order: us_time, us, sci, orig, inv, cons, N, R2" _n
file write usep  "=============" _n
foreach v in E2 E3 E4 E5 E6 E7 {
	file write usep  "`v': " _n
	file write usep  "Mean: " (round(`mean_`v'', 0.0001)) _n
	file write usep  "p: " (round(`p_`v'', 0.0001)) _n
	file write usep  "Conf 95: ["     
	file write usep (round(`l95_`v'', 0.001))
	file write usep ", "
	file write usep (round(`h95_`v'', 0.001))
	file write usep "]" _n
	file write usep  "=============" _n
}
file write usep  "N: " (round(`N_E8', 0.1)) _n 
file write usep  "R2: " (round(`R_E9', 0.001)) _n _n

file write usep  "Triple diff" _n
file write usep "Order: trip_lecoef, twinXtime, twinXUS, us_time, us, twin, sci, orig, inv, cons, synthetic_statistic, N, R2" _n
file write usep  "=============" _n
foreach v in C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C13{
	file write usep  "`v': " _n
	file write usep  "Mean: " (round(`mean_`v'', 0.0001)) _n
	file write usep "Variance: " (round(`var_`v'', 0.000001)) _n
	file write usep  "p: " (round(`p_`v'', 0.0001)) _n
	file write usep  "Conf 95: ["     
	file write usep (round(`l95_`v'', 0.001))
	file write usep ", "
	file write usep (round(`h95_`v'', 0.001))
	file write usep "]" _n
	file write usep  "=============" _n
}
file write usep  "N: " (round(`N_C12', 0.1)) _n 
file write usep  "R2: " (round(`R_C14', 0.001)) _n _n
file close usep
